From a65c5d799d59fb722f56aae29f6553def67bbf99 Mon Sep 17 00:00:00 2001
From: Timothy Pearson <tpearson@raptorengineeringinc.com>
Date: Sun, 26 Jul 2015 00:55:43 -0500
Subject: [PATCH 085/143] amd/amdmct/mct_ddr3: Partially fix up registered
 DIMMs on Fam10h

Sufficient support has been added to allow booting with registered
DIMMs on the KGPE-D16 in certain slots.  ECC support needs additional
work; the ECC data lanes appear to cause boot failures in some slots.

Change-Id: Ieaf4cbf351908e5a89760be49a6667dc55dbc575
Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
---
 src/northbridge/amd/amdmct/mct_ddr3/mct_d.c    |  196 ++++++++++++++++++++++--
 src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c |   32 ++--
 src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c |   58 ++++---
 src/northbridge/amd/amdmct/mct_ddr3/mctrci.c   |  151 ++++++++++++------
 src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c   |    8 +
 src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c   |   26 ++--
 src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c  |   72 ++++++---
 7 files changed, 399 insertions(+), 144 deletions(-)

diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
index 9a86c08..b29ff3c 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
@@ -305,6 +305,120 @@ static uint16_t mhz_to_memclk_config(uint16_t freq)
 		return fam10h_mhz_to_memclk_config(freq) + 1;
 }
 
+static uint32_t fam10h_address_timing_compensation_code(struct DCTStatStruc *pDCTstat, uint8_t dct)
+{
+	uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH);
+
+	uint8_t package_type;
+	uint32_t calibration_code = 0;
+
+	package_type = mctGet_NVbits(NV_PACK_TYPE);
+	uint16_t MemClkFreq = (Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x7) + 1;
+
+	/* Obtain number of DIMMs on channel */
+	uint8_t dimm_count = pDCTstat->MAdimms[dct];
+	uint8_t rank_count_dimm0;
+	uint8_t rank_count_dimm1;
+
+	if (package_type == PT_GR) {
+		/* Socket G34 */
+		if (pDCTstat->Status & (1 << SB_Registered)) {
+			/* RDIMM */
+			/* Fam10h BKDG Rev. 3.62 section 2.8.9.5.8 Tables 60 - 61 */
+			if (MaxDimmsInstallable == 1) {
+				if (MemClkFreq == 0x4) {
+					/* DDR3-800 */
+					calibration_code = 0x00000000;
+				} else if (MemClkFreq == 0x5) {
+					/* DDR3-1066 */
+					calibration_code = 0x003c3c3c;
+				} else if (MemClkFreq == 0x6) {
+					/* DDR3-1333 */
+					calibration_code = 0x003a3a3a;
+				}
+			} else if (MaxDimmsInstallable == 2) {
+				if (dimm_count == 1) {
+					/* 1 DIMM detected */
+					if (MemClkFreq == 0x4) {
+						/* DDR3-800 */
+						calibration_code = 0x00000000;
+					} else if (MemClkFreq == 0x5) {
+						/* DDR3-1066 */
+						calibration_code = 0x003c3c3c;
+					} else if (MemClkFreq == 0x6) {
+						/* DDR3-1333 */
+						calibration_code = 0x003a3a3a;
+					}
+				} else if (dimm_count == 2) {
+					/* 2 DIMMs detected */
+					if (MemClkFreq == 0x4) {
+						/* DDR3-800 */
+						calibration_code = 0x00000000;
+					} else if (MemClkFreq == 0x5) {
+						/* DDR3-1066 */
+						calibration_code = 0x003a3c3a;
+					} else if (MemClkFreq == 0x6) {
+						/* DDR3-1333 */
+						calibration_code = 0x00383a38;
+					}
+				}
+			} else if (MaxDimmsInstallable == 3) {
+				/* TODO
+				 * 3 DIMM/channel support unimplemented
+				 */
+			}
+		} else {
+			/* UDIMM */
+			/* Fam10h BKDG Rev. 3.62 section 2.8.9.5.8 Table 56 */
+			if (dimm_count == 1) {
+				/* 1 DIMM detected */
+				rank_count_dimm0 = pDCTstat->DimmRanks[(1 * 2) + dct];
+
+				if (MemClkFreq == 0x4) {
+					/* DDR3-800 */
+					if (rank_count_dimm0 == 1)
+						calibration_code = 0x00000000;
+					else
+						calibration_code = 0x003b0000;
+				} else if (MemClkFreq == 0x5) {
+					/* DDR3-1066 */
+					if (rank_count_dimm0 == 1)
+						calibration_code = 0x00000000;
+					else
+						calibration_code = 0x00380000;
+				} else if (MemClkFreq == 0x6) {
+					/* DDR3-1333 */
+					if (rank_count_dimm0 == 1)
+						calibration_code = 0x00000000;
+					else
+						calibration_code = 0x00360000;
+				}
+			} else if (dimm_count == 2) {
+				/* 2 DIMMs detected */
+				rank_count_dimm0 = pDCTstat->DimmRanks[(0 * 2) + dct];
+				rank_count_dimm1 = pDCTstat->DimmRanks[(1 * 2) + dct];
+
+				if (MemClkFreq == 0x4) {
+					/* DDR3-800 */
+					calibration_code = 0x00390039;
+				} else if (MemClkFreq == 0x5) {
+					/* DDR3-1066 */
+					calibration_code = 0x00350037;
+				} else if (MemClkFreq == 0x6) {
+					/* DDR3-1333 */
+					calibration_code = 0x00000035;
+				}
+			}
+		}
+	} else {
+		/* TODO
+		 * Other socket support unimplemented
+		 */
+	}
+
+	return calibration_code;
+}
+
 static uint32_t fam15h_phy_predriver_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength)
 {
 	uint8_t lrdimm = 0;
@@ -999,7 +1113,7 @@ static uint32_t fam15h_address_timing_compensation_code(struct DCTStatStruc *pDC
 			/* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 74 */
 			if (MaxDimmsInstallable == 1) {
 				if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
-					/* DDR3-667 - DDR3-800*/
+					/* DDR3-667 - DDR3-800 */
 					calibration_code = 0x00000000;
 				} else if (MemClkFreq == 0xa) {
 					/* DDR3-1066 */
@@ -1015,7 +1129,7 @@ static uint32_t fam15h_address_timing_compensation_code(struct DCTStatStruc *pDC
 				if (dimm_count == 1) {
 					/* 1 DIMM detected */
 					if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
-						/* DDR3-667 - DDR3-800*/
+						/* DDR3-667 - DDR3-800 */
 						calibration_code = 0x00000000;
 					} else if (MemClkFreq == 0xa) {
 						/* DDR3-1066 */
@@ -1030,7 +1144,7 @@ static uint32_t fam15h_address_timing_compensation_code(struct DCTStatStruc *pDC
 				} else if (dimm_count == 2) {
 					/* 2 DIMMs detected */
 					if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) {
-						/* DDR3-667 - DDR3-800*/
+						/* DDR3-667 - DDR3-800 */
 						calibration_code = 0x00000000;
 					} else if (MemClkFreq == 0xa) {
 						/* DDR3-1066 */
@@ -1308,6 +1422,26 @@ static void read_spd_bytes(struct MCTStatStruc *pMCTstat,
 	}
 }
 
+#ifdef DEBUG_DIMM_SPD
+static void dump_spd_bytes(struct MCTStatStruc *pMCTstat,
+			struct DCTStatStruc *pDCTstat, uint8_t dimm)
+{
+	uint16_t byte;
+
+	printk(BIOS_DEBUG, "SPD dump for DIMM %d\n   ", dimm);
+	for (byte = 0; byte < 16; byte++) {
+		printk(BIOS_DEBUG, "%02x ", byte);
+	}
+	for (byte = 0; byte < 256; byte++) {
+		if ((byte & 0xf) == 0x0) {
+			printk(BIOS_DEBUG, "\n%02x ", byte >> 4);
+		}
+		printk(BIOS_DEBUG, "%02x ", pDCTstat->spd_data.spd_bytes[dimm][byte]);
+	}
+	printk(BIOS_DEBUG, "\n");
+}
+#endif
+
 #if IS_ENABLED(CONFIG_HAVE_ACPI_RESUME)
 static void calculate_and_store_spd_hashes(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat)
@@ -1508,12 +1642,14 @@ restartinit:
 		pMCTstat->GStatus |= 1 << GSB_ConfigRestored;
 #endif
 
-		printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
-		for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
-			struct DCTStatStruc *pDCTstat;
-			pDCTstat = pDCTstatA + Node;
+		if (is_fam15h()) {
+			printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
+			for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+				struct DCTStatStruc *pDCTstat;
+				pDCTstat = pDCTstatA + Node;
 
-			mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
+				mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
+			}
 		}
 	} else {
 		NodesWmem = 0;
@@ -1675,14 +1811,14 @@ restartinit:
 
 			printk(BIOS_DEBUG, "mctAutoInitMCT_D: UMAMemTyping_D\n");
 			UMAMemTyping_D(pMCTstat, pDCTstatA);	/* Fix up for UMA sizing */
-		}
 
-		printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
-		for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
-			struct DCTStatStruc *pDCTstat;
-			pDCTstat = pDCTstatA + Node;
+			printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
+			for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+				struct DCTStatStruc *pDCTstat;
+				pDCTstat = pDCTstatA + Node;
 
-			mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
+				mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
+			}
 		}
 
 		if (is_fam15h()) {
@@ -2719,6 +2855,10 @@ static void DCTFinalInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *p
 		dword = 1 << DisDramInterface;
 		Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword);
 
+		dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x90);
+		dword &= ~(1 << ParEn);
+		Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x90, dword);
+
 		/* To maximize power savings when DisDramInterface=1b,
 		 * all of the MemClkDis bits should also be set.
 		 */
@@ -3602,7 +3742,9 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
 		dword++;
 	}
 
-	if (!(Status & (1 << SB_Registered)))
+	if (Status & (1 << SB_Registered))
+		DramConfigLo |= 1 << ParEn;		/* Registered DIMMs */
+	else
 		DramConfigLo |= 1 << UnBuffDimm;	/* Unbuffered DIMMs */
 
 	if (mctGet_NVbits(NV_ECC_CAP))
@@ -4091,6 +4233,9 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
 			if (status >= 0) { /* SPD access is ok */
 				pDCTstat->DIMMPresent |= 1 << i;
 				read_spd_bytes(pMCTstat, pDCTstat, i);
+#ifdef DEBUG_DIMM_SPD
+				dump_spd_bytes(pMCTstat, pDCTstat, i);
+#endif
 				crc_status = crcCheck(pDCTstat, i);
 				if (!crc_status) {
 					/* Try again in case there was a transient glitch */
@@ -4390,6 +4535,10 @@ static void mct_initDCT(struct MCTStatStruc *pMCTstat,
 				val = 1 << DisDramInterface;
 				Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
 
+				val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+				val &= ~(1 << ParEn);
+				Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val);
+
 				/* To maximize power savings when DisDramInterface=1b,
 				 * all of the MemClkDis bits should also be set.
 				 */
@@ -4542,8 +4691,9 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat,
 	}
 	for (i=i_start; i<i_end; i++) {
 		index_reg = 0x98;
-		Set_NB32_index_wait_DCT(dev, i, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A/B Output Driver Compensation Control */
-		Set_NB32_index_wait_DCT(dev, i, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A/B Output Driver Compensation Control */
+		Set_NB32_index_wait_DCT(dev, i, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]);	/* Channel A/B Output Driver Compensation Control */
+		Set_NB32_index_wait_DCT(dev, i, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]);	/* Channel A/B Output Driver Compensation Control */
+		printk(BIOS_SPEW, "Programmed DCT %d timing/termination pattern %08x %08x\n", dct, pDCTstat->CH_ADDR_TMG[i], pDCTstat->CH_ODC_CTL[i]);
 	}
 
 	return pDCTstat->ErrCode;
@@ -4600,11 +4750,19 @@ static u8 mct_SPDCalcWidth(struct MCTStatStruc *pMCTstat,
 		val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94);
 		val |= 1 << DisDramInterface;
 		Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val);
+
+		val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90);
+		val &= ~(1 << ParEn);
+		Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, val);
 	}
 	if (pDCTstat->DIMMValidDCT[1] == 0) {
 		val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94);
 		val |= 1 << DisDramInterface;
 		Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val);
+
+		val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90);
+		val &= ~(1 << ParEn);
+		Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val);
 	}
 
 	printk(BIOS_DEBUG, "SPDCalcWidth: Status %x\n", pDCTstat->Status);
@@ -6035,6 +6193,8 @@ static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat,
 		dword &= ~(0xf);				/* RdOdtTrnOnDly = read_odt_delay */
 		dword |= (read_odt_delay & 0xf);
 		Set_NB32_DCT(dev, dct, 0x240, dword);
+
+		printk(BIOS_SPEW, "Programmed ODT pattern %08x %08x %08x %08x\n", odt_pattern_0, odt_pattern_1, odt_pattern_2, odt_pattern_3);
 	} else if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
 		if (pDCTstat->Speed == 3)
 			dword = 0x00000800;
@@ -6170,6 +6330,8 @@ static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat,
 			Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x181, odt_pattern_0);
 			Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x182, odt_pattern_3);
 			Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x183, odt_pattern_2);
+
+			printk(BIOS_SPEW, "Programmed ODT pattern %08x %08x %08x %08x\n", odt_pattern_0, odt_pattern_1, odt_pattern_2, odt_pattern_3);
 		}
 	}
 
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
index 3df262b..4ae1aec 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c
@@ -21,7 +21,7 @@
 /* AM3/ASB2/C32/G34 DDR3 */
 
 static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
-				u32 *AddrTmgCTL, u32 *ODC_CTL,
+				u32 *ODC_CTL,
 				u8 *CMDmode);
 
 void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat,
@@ -34,9 +34,14 @@ void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat,
 	} else {
 		Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed,
 					pDCTstat->MAload[dct],
-					&(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]),
+					&(pDCTstat->CH_ODC_CTL[dct]),
 					&pDCTstat->_2Tmode);
 
+		if (pDCTstat->Status & (1 << SB_Registered)) {
+			pDCTstat->_2Tmode = 1;	/* Disable slow access mode */
+		}
+		pDCTstat->CH_ADDR_TMG[dct] = fam10h_address_timing_compensation_code(pDCTstat, dct);
+
 		pDCTstat->CH_ODC_CTL[dct] |= 0x20000000;	/* 60ohms */
 	}
 
@@ -54,42 +59,25 @@ void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat,
  *    : ODC_CTL    - Output Driver Compensation Control Register Value
  *    : CMDmode    - CMD mode
  */
-static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload,
-				u32 *AddrTmgCTL, u32 *ODC_CTL,
+static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
+				u32 *ODC_CTL,
 				u8 *CMDmode)
 {
-	*AddrTmgCTL = 0;
 	*ODC_CTL = 0;
 	*CMDmode = 1;
 
-	if(MAAdimms == 1) {
-		if(MAAload >= 16) {
-			if(Speed == 4)
-				*AddrTmgCTL = 0x003B0000;
-			else if (Speed == 5)
-				*AddrTmgCTL = 0x00380000;
-			else if (Speed == 6)
-				*AddrTmgCTL = 0x00360000;
-			else
-				*AddrTmgCTL = 0x00340000;
-		} else {
-			*AddrTmgCTL = 0x00000000;
-		}
+	if (MAAdimms == 1) {
 		*ODC_CTL = 0x00113222;
 		*CMDmode = 1;
 	} else /* if(MAAdimms == 0) */ {
 		if(Speed == 4) {
 			*CMDmode = 1;
-			*AddrTmgCTL = 0x00390039;
 		} else if(Speed == 5) {
 			*CMDmode = 1;
-			*AddrTmgCTL = 0x00350037;
 		} else if(Speed == 6) {
 			*CMDmode = 2;
-			*AddrTmgCTL = 0x00000035;
 		} else {
 			*CMDmode = 2;
-			*AddrTmgCTL = 0x00000033;
 		}
 		*ODC_CTL = 0x00223323;
 	}
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index b0ad54b..36e9858 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -241,37 +241,53 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat,
 				u16 like, u8 scale, u8 ChipSel)
 {
-	u8 DQSDelay0, DQSDelay1;
-	u16 DQSDelay;
+	uint8_t DQSDelay0, DQSDelay1;
+	int16_t delay_differential;
+	uint16_t DQSDelay;
 
 	if (pDCTstat->Status & (1 << SB_Registered)) {
-		return;
-	}
+		pDCTstat->ByteLane = 0x2;
+		GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
+		DQSDelay0 = pDCTstat->DQSDelay;
 
-	pDCTstat->ByteLane = like & 0xff;
-	GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
-	DQSDelay0 = pDCTstat->DQSDelay;
+		pDCTstat->ByteLane = 0x3;
+		GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
+		DQSDelay1 = pDCTstat->DQSDelay;
 
-	pDCTstat->ByteLane = (like >> 8) & 0xff;
-	GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
-	DQSDelay1 = pDCTstat->DQSDelay;
+		if (pDCTstat->Direction == DQS_READDIR) {
+			DQSDelay = DQSDelay1;
+		} else {
+			delay_differential = (int16_t)DQSDelay1 - (int16_t)DQSDelay0;
+			delay_differential += (int16_t)DQSDelay1;
 
-	if (DQSDelay0>DQSDelay1) {
-		DQSDelay = DQSDelay0 - DQSDelay1;
+			DQSDelay = delay_differential;
+		}
 	} else {
-		DQSDelay = DQSDelay1 - DQSDelay0;
-	}
+		pDCTstat->ByteLane = like & 0xff;
+		GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
+		DQSDelay0 = pDCTstat->DQSDelay;
 
-	DQSDelay = DQSDelay * (~scale);
+		pDCTstat->ByteLane = (like >> 8) & 0xff;
+		GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
+		DQSDelay1 = pDCTstat->DQSDelay;
 
-	DQSDelay += 0x80;	/* round it */
+		if (DQSDelay0>DQSDelay1) {
+			DQSDelay = DQSDelay0 - DQSDelay1;
+		} else {
+			DQSDelay = DQSDelay1 - DQSDelay0;
+		}
 
-	DQSDelay >>= 8;		/* 256 */
+		DQSDelay = DQSDelay * (~scale);
 
-	if (DQSDelay0>DQSDelay1) {
-		DQSDelay = DQSDelay1 - DQSDelay;
-	} else {
-		DQSDelay += DQSDelay1;
+		DQSDelay += 0x80;	/* round it */
+
+		DQSDelay >>= 8;		/* 256 */
+
+		if (DQSDelay0>DQSDelay1) {
+			DQSDelay = DQSDelay1 - DQSDelay;
+		} else {
+			DQSDelay += DQSDelay1;
+		}
 	}
 
 	pDCTstat->DQSDelay = (u8)DQSDelay;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
index 5ea7fa6..9617f84 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c
@@ -18,12 +18,39 @@
  * Foundation, Inc.
  */
 
+static uint16_t memclk_to_freq(uint16_t memclk) {
+	uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800};
+	uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
+
+	uint16_t mem_freq = 0;
+
+	if (is_fam15h()) {
+		if (memclk < 0x17) {
+			mem_freq = fam15h_freq_tab[memclk];
+		}
+	} else {
+		if ((memclk > 0x0) && (memclk < 0x8)) {
+			mem_freq = fam10h_freq_tab[memclk - 1];
+		}
+	}
+
+	return mem_freq;
+}
+
+static uint32_t rc_word_value_to_ctl_bits(uint32_t value) {
+	return ((value >> 2) & 3) << 16 | ((value & 3) << 3);
+}
+
 static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
 			struct DCTStatStruc *pDCTstat, u32 MrsChipSel, u32 CtrlWordNum)
 {
-	u8 Dimms, DimmNum, MaxDimm, Speed;
+	u8 Dimms, DimmNum;
 	u32 val;
 	u32 dct = 0;
+	uint8_t ddr_voltage_index;
+	uint16_t mem_freq;
+	uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+	uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH);
 
 	DimmNum = (MrsChipSel >> 20) & 0xFE;
 
@@ -32,54 +59,64 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat,
 	/* DimmNum ++; */
 	/* cl +=8; */
 
-	MaxDimm = mctGet_NVbits(NV_MAX_DIMMS);
-	Speed = pDCTstat->DIMMAutoSpeed;
+	mem_freq = memclk_to_freq(pDCTstat->DIMMAutoSpeed);
 
 	if (pDCTstat->CSPresent_DCT[0] > 0) {
 		dct = 0;
-	} else if (pDCTstat->CSPresent_DCT[1] > 0 ){
+	} else if (pDCTstat->CSPresent_DCT[1] > 0 ) {
 		dct = 1;
-		DimmNum ++;
+		DimmNum++;
 	}
 	Dimms = pDCTstat->MAdimms[dct];
 
+	ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
+
 	val = 0;
 	if (CtrlWordNum == 0)
-		val |= 1 << 1;
+		val = 0x2;
 	else if (CtrlWordNum == 1) {
 		if (!((pDCTstat->DimmDRPresent | pDCTstat->DimmQRPresent) & (1 << DimmNum)))
-			val |= 0xC; /* if single rank, set DBA1 and DBA0 */
+			val = 0xC; /* if single rank, set DBA1 and DBA0 */
 	} else if (CtrlWordNum == 2) {
-		if (MaxDimm == 4) {
-			if (Speed == 4) {
-				if (((pDCTstat->DimmQRPresent & (1 << DimmNum)) && (Dimms == 1)) || (Dimms == 2))
-					if (!(pDCTstat->MirrPresU_NumRegR & (1 << DimmNum)))
-						val |= 1 << 2;
-			} else {
-				if (pDCTstat->MirrPresU_NumRegR & (1 << DimmNum))
-					val |= 1 << 2;
+		if (package_type == PT_GR) {
+			/* Socket G34 */
+			if (MaxDimmsInstallable == 2) {
+				if (Dimms > 1)
+					val = 0x4;
 			}
-		} else {
-			if (Dimms > 1)
-				val |= 1 << 2;
 		}
 	} else if (CtrlWordNum == 3) {
-		val |= (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xFF;
+		val = (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xFF;
 	} else if (CtrlWordNum == 4) {
-		val |= (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xFF;
+		val = (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xFF;
 	} else if (CtrlWordNum == 5) {
-		val |= (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xFF;
+		val = (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xFF;
 	} else if (CtrlWordNum == 8) {
-		if (MaxDimm == 4)
-			if (Speed == 4)
-				if (pDCTstat->MirrPresU_NumRegR & (1 << DimmNum))
-					val |= 1 << 2;
+		if (package_type == PT_GR) {
+			/* Socket G34 */
+			if (MaxDimmsInstallable == 2) {
+				val = 0x0;
+			}
+		}
 	} else if (CtrlWordNum == 9) {
-		val |= 0xD;	/* DBA1, DBA0, DA3 = 0 */
+		val = 0xD;	/* DBA1, DBA0, DA3 = 0 */
+	} else if (CtrlWordNum == 10) {
+		val = 0x0;	/* Lowest operating frequency */
+	} else if (CtrlWordNum == 11) {
+		if (ddr_voltage_index & 0x4)
+			val = 0x2;	/* 1.25V */
+		else if (ddr_voltage_index & 0x2)
+			val = 0x1;	/* 1.35V */
+		else
+			val = 0x0;	/* 1.5V */
+	} else if (CtrlWordNum >= 12) {
+		val = 0x0;	/* Unset */
 	}
-	val &= 0xffffff0f;
+	val &= 0xf;
+
+	printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", CtrlWordNum, val);
 
-	val = MrsChipSel | ((val >> 2) & 3) << 16 | ((val & 3) << 3);
+	val = MrsChipSel | rc_word_value_to_ctl_bits(val);
 
 	/* transfer Control word number to address [BA2,A2,A1,A0] */
 	if (CtrlWordNum > 7) {
@@ -129,18 +166,18 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat,
 			val &= ~(0xF << 8);
 
 			switch (MrsChipSel) {
-			case 0:
-			case 1:
-				val |= 3 << 8;
-			case 2:
-			case 3:
-				val |= (3 << 2) << 8;
-			case 4:
-			case 5:
-				val |= (3 << 4) << 8;
-			case 6:
-			case 7:
-				val |= (3 << 6) << 8;
+				case 0:
+				case 1:
+					val |= 3 << 8;
+				case 2:
+				case 3:
+					val |= (3 << 2) << 8;
+				case 4:
+				case 5:
+					val |= (3 << 4) << 8;
+				case 6:
+				case 7:
+					val |= (3 << 6) << 8;
 			}
 			Set_NB32_DCT(dev, dct, 0xa8, val);
 
@@ -164,8 +201,10 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
 	u32 MrsChipSel;
 	u32 dev = pDCTstat->dev_dct;
 	u32 val;
+	uint16_t mem_freq;
 
 	pDCTstat->DIMMAutoSpeed = pDCTstat->TargetFreq;
+	mem_freq = memclk_to_freq(pDCTstat->TargetFreq);
 	for (MrsChipSel=0; MrsChipSel < 8; MrsChipSel++, MrsChipSel++) {
 		if (pDCTstat->CSPresent & (1 << MrsChipSel)) {
 			/* 2. Program F2x[1, 0]A8[CtrlWordCS]=bit mask for target chip selects. */
@@ -175,19 +214,31 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat,
 			Set_NB32_DCT(dev, 0, 0xA8, val); /* TODO: dct 0 / 1 select */
 
 			/* Resend control word 10 */
+			uint8_t freq_ctl_val = 0;
 			mct_Wait(1600);
-			switch (pDCTstat->TargetFreq) {
-			case 5:
-				mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x4000A);
-				break;
-			case 6:
-				mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x40012);
-				break;
-			case 7:
-				mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x4001A);
-				break;
+			switch (mem_freq) {
+				case 333:
+				case 400:
+					freq_ctl_val = 0x0;
+					break;
+				case 533:
+					freq_ctl_val = 0x1;
+					break;
+				case 667:
+					freq_ctl_val = 0x2;
+					break;
+				case 800:
+					freq_ctl_val = 0x3;
+					break;
+				case 933:
+					freq_ctl_val = 0x4;
+					break;
 			}
 
+			printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", 10, freq_ctl_val);
+
+			mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x40002 | rc_word_value_to_ctl_bits(freq_ctl_val));
+
 			mct_Wait(1600);
 
 			/* Resend control word 2 */
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
index 6a2c2a7..9ccf77e 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
@@ -496,6 +496,8 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
 		ret |= ((dword >> 10) & 3) << 9;
 	}
 
+	printk(BIOS_SPEW, "Going to send MR2 control word %08x\n", ret);
+
 	return ret;
 }
 
@@ -525,6 +527,8 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat,
 		ret |= (dword >> 24) & 7;
 	}
 
+	printk(BIOS_SPEW, "Going to send MR3 control word %08x\n", ret);
+
 	return ret;
 }
 
@@ -619,6 +623,8 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
 			ret |= 1 << 12;
 	}
 
+	printk(BIOS_SPEW, "Going to send MR1 control word %08x\n", ret);
+
 	return ret;
 }
 
@@ -738,6 +744,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
 		ret |= 1 << 8;
 	}
 
+	printk(BIOS_SPEW, "Going to send MR0 control word %08x\n", ret);
+
 	return ret;
 }
 
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
index 9313673..981f467 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
@@ -1744,6 +1744,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 	u16 EccDQSLike;
 	u8 EccDQSScale;
 	u32 val, val0, val1;
+	int16_t delay_differential;
 
 	EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
 	EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
@@ -1753,14 +1754,22 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 			u16 *p;
 			p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
 
-			/* DQS Delay Value of Data Bytelane
-			 * most like ECC byte lane */
-			val0 = p[EccDQSLike & 0x07];
-			/* DQS Delay Value of Data Bytelane
-			 * 2nd most like ECC byte lane */
-			val1 = p[(EccDQSLike>>8) & 0x07];
+			if (pDCTstat->Status & (1 << SB_Registered)) {
+				val0 = p[0x2];
+				val1 = p[0x3];
+
+				delay_differential = (int16_t)val1 - (int16_t)val0;
+				delay_differential += (int16_t)val1;
+
+				val = delay_differential;
+			} else {
+				/* DQS Delay Value of Data Bytelane
+				 * most like ECC byte lane */
+				val0 = p[EccDQSLike & 0x07];
+				/* DQS Delay Value of Data Bytelane
+				 * 2nd most like ECC byte lane */
+				val1 = p[(EccDQSLike>>8) & 0x07];
 
-			if (!(pDCTstat->Status & (1 << SB_Registered))) {
 				if(val0 > val1) {
 					val = val0 - val1;
 				} else {
@@ -1775,9 +1784,6 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 				} else {
 					val += val0;
 				}
-			} else {
-				val = val1 - val0;
-				val += val1;
 			}
 
 			pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
index 47ad152..e5e4031 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
@@ -930,7 +930,9 @@ void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 		else if ((cs == 4) || (cs == 0))
 			WrLvOdt1 = (dword & 0xf);
 	} else {
-		if (pDCTData->Status[DCT_STATUS_REGISTERED] == 0) {
+		if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+			WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm);
+		} else {
 			if ((pDCTData->DctCSPresent & 0x05) == 0x05) {
 				WrLvOdt1 = 0x03;
 			} else if (bitTest((u32)pDCTData->DctCSPresent,(u8)(dimm*2+1))) {
@@ -938,14 +940,14 @@ void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 			} else {
 				WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm);
 			}
-		} else {
-			WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm);
 		}
 	}
 
 	set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT,
 			DRAM_ADD_DCT_PHY_CONTROL_REG, 8, 11, (u32)WrLvOdt1);
 
+	printk(BIOS_SPEW, "Programmed DCT %d write levelling ODT pattern %08x\n", dct, WrLvOdt1);
+
 }
 
 #ifdef UNUSED_CODE
@@ -980,7 +982,7 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 	u16 Addl_Data_Offset, Addl_Data_Port;
 	sMCTStruct *pMCTData = pDCTstat->C_MCTPtr;
 	sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
-	u16 fam10h_freq_tab[] = {400, 533, 667, 800};
+	uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800};
 	uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
 
 	if (is_fam15h()) {
@@ -1093,21 +1095,18 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 				pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
 			}
 		} else {
-			if (pDCTData->Status[DCT_STATUS_REGISTERED])
-			{
-				if(pDCTData->RegMan1Present & ((1<<(dimm*2+dct))))
-				{
+			if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+				uint8_t AddrCmdPrelaunch = 0;		/* TODO: Fetch the correct value from RC2[0] */
+
+				/* The seed values below assume Pass 1 utilizes a 400MHz clock frequency (DDR3-800) */
+				if (AddrCmdPrelaunch == 0) {
 					Seed_Gross = 0x02;
-					Seed_Fine = 0x16;
-				}
-				else
-				{
+					Seed_Fine = 0x01;
+				} else {
 					Seed_Gross = 0x02;
-					Seed_Fine = 0x00;
+					Seed_Fine = 0x11;
 				}
-			}
-			else
-			{
+			} else {
 				if (MemClkFreq == 6) {
 					/* DDR-800 */
 					Seed_Gross = 0x00;
@@ -1131,6 +1130,7 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 			 */
 			pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
 			pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+			printk(BIOS_SPEW, "\tLane %02x initial seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
 		}
 	} else {
 		/* Pass 2 */
@@ -1182,21 +1182,30 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 
 				pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross;
 				pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+
+				printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
 			}
 		} else {
-			u32 RegisterDelay, SeedTotal;
+			uint32_t RegisterDelay;
+			uint32_t SeedTotalPreScaling;
+			uint32_t SeedTotal;
+			uint8_t AddrCmdPrelaunch = 0;		/* TODO: Fetch the correct value from RC2[0] */
 			for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
 			{
-				if (pDCTData->Status[DCT_STATUS_REGISTERED])
-					RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
-				else
+				if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+					if (AddrCmdPrelaunch == 0)
+						RegisterDelay = 0x20;
+					else
+						RegisterDelay = 0x30;
+				} else {
 					RegisterDelay = 0;
-				SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
-					(pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
+				}
+				SeedTotalPreScaling = ((pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+					(pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5)) - RegisterDelay;
 				/* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
 				training) - RegisterDelay. */
-				SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
-									fam10h_freq_tab[MemClkFreq-3] * 100) / (fam10h_freq_tab[0] * 100)));
+				SeedTotal = (uint16_t) ((((uint64_t) SeedTotalPreScaling) *
+									fam10h_freq_tab[MemClkFreq] * 100) / (fam10h_freq_tab[3] * 100));
 				Seed_Gross = SeedTotal / 32;
 				Seed_Fine = SeedTotal & 0x1f;
 				if (Seed_Gross == 0)
@@ -1205,8 +1214,20 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui
 					Seed_Gross = 1;
 				else
 					Seed_Gross = 2;
+
+				/* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems
+				 * due to the long register delays causing premature total delay wrap-around.
+				 * Attempt to work around this...
+				 */
+				SeedTotal = ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f);
+				SeedTotal += RegisterDelay;
+				Seed_Gross = SeedTotal / 32;
+				Seed_Fine = SeedTotal & 0x1f;
+
 				pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
 				pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+
+				printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f));
 			}
 		}
 	}
@@ -1383,6 +1404,8 @@ void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8
 	gross = get_ADD_DCT_Bits(pDCTData, dct, pDCTData->NodeId,
 				FUN_DCT, (u16)addr, grossStartLoc, grossEndLoc);
 
+	printk(BIOS_SPEW, "\tLane %02x raw readback: %04x\n", ByteLane, ((gross & 0x1f) << 5) | (fine & 0x1f));
+
 	if (!is_fam15h()) {
 		/* Adjust seed gross delay overflow (greater than 3):
 		 * - Adjust the trained gross delay to the original seed gross delay.
@@ -1406,4 +1429,5 @@ void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8
 	}
 	pDCTData->WLFineDelay[index+ByteLane] = (u8)fine;
 	pDCTData->WLGrossDelay[index+ByteLane] = (u8)gross;
+	printk(BIOS_SPEW, "\tLane %02x final adjusted value: %04x\n", ByteLane, ((gross & 0x1f) << 5) | (fine & 0x1f));
 }
-- 
1.7.9.5

